**----------------------------------------------------------------------------**
**--Local Corruption & Household Business Tax Compliance
**--Duong Le, Eddy Malesky, Anh Pham
**--Journal of Economic Behavior and Organization (July 2020)
**----------------------------------------------------------------------------**
**--Do File: Producing all Figures in the paper
**----------------------------------------------------------------------------**

clear all 
set more off
set matsize 11000 
**specify your main directory that stores all replication files
global main_directory = "insert your working directory here" 

//Replace with your data directory
cd "$main_directory\data"

//Replace with the folder you want to store all result outputs
global result "$main_directory\result"

**----------------------------------------------------------------------------**
**---Figure: marginal effect of corruption on tax registration (by firm visibility)
**----------------------------------------------------------------------------**
use "firm_commune_pair.dta", replace

global C_governance="pciECindex_1 pciLAindex_1 pciTindex_1 pciTCindex_1 pciBindex_1 pciPindex_1 pciBSSindex_1 pciLTindex_1 pciLIindex_1"
global C_GIS_ctrls = "area_km2 boundary_km lat lon"
global X2_ctrls= "age sex i.educ i.location"
global C_policy_ctrls = "ezone_c izone_c taxreduc"
global geo=" elev river suit_rice suit_tea suit_coffee suit_coconut suit_sugar slope tem precip"
global econ_formal= "nfirmf ld_endme ld_endm ts_asset_endme ts_asset_endm kqkd_rev_busme kqkd_rev_busm kqkd_prof_bftaxme kqkd_prof_bftaxm"

	
 **---Histogram---Employment
histogram worker_total_99, frequency normal graphregion(color(white)) ///
ytitle("Number of Businesses", size(small)) xtitle("Panel A2: Visibility by [Number of workers]", size(small)) ///
ylabel(#3, labsize(small)) xlabel(#6, labsize(small)) title("") legend(off) 

graph save "$result/histogram_employment.gph", replace


**---Histogram---Working Hours
histogram operation_hr, frequency normal graphregion(color(white)) ///
ytitle("Number of Businesses", size(small))  xtitle("Panel B2: Visibility by [Operating Hours]", size(small)) ///
ylabel(#3, labsize(small)) xlabel(#14, labsize(small)) title("") legend(off)

graph save "$result/histogram_hours.gph", replace


/*MarginsPlot*/
**---Employment - tax-ID posession

reghdfe taxid $C_governance c.pciICindex_1##c.worker_total_99 $X2_ctrls $C_policy_ctrls $C_GIS_ctrls  light16_mean $geo $econ_formal ///
if thudn1>100000& thudn1<.& econ_formal==1, absorb(com_pairID ind3) cluster(provinceID_2017 borderID) 
		  
		  
margins, dydx(pciICindex_1) at(worker_total_99=(1(1)6))
marginsplot, graphregion(color(white)) level(95) yline(0, lpattern(longdash_dot) lwidth(vthin) lcolor(red)) ///
xtitle("Panel A1: Visibility by [Number of workers]", size(small)) xlabel(#6, labsize(small)) ///
ytitle("Marginal Effects of Corruption", size(small)) title("") legend(off) 

graph save "$result/marginPlot_employment.gph", replace
 
**---Working Hours - tax-ID posession

reghdfe taxid $C_governance c.pciICindex_1##c.operation_hr $X2_ctrls $C_policy_ctrls $C_GIS_ctrls  light16_mean $geo $econ_formal ///
if thudn1>100000& thudn1<.& econ_formal==1, absorb(com_pairID ind3) cluster(provinceID_2017 borderID) 
	
		  
margins, dydx(pciICindex_1) at(operation_hr=(0(1)24))
marginsplot, graphregion(color(white)) level(95) yline(0, lpattern(longdash_dot) lwidth(vthin) lcolor(red)) ///
 xtitle("Panel B1: Visibility by [Operating Hours]", size(small)) xlabel(#23, labsize(small)) ///
ytitle("Marginal Effects of Corruption", size(small)) title("") legend(off) 

graph save "$result/marginPlot_hours.gph", replace


* Combine Graphs
graph combine ///
"$result/marginPlot_employment.gph" "$result/marginPlot_hours.gph" ///
"$result/histogram_employment.gph" "$result/histogram_hours.gph", rows(2) cols(2) ///
graphregion(fcolor(white) ifcolor(white)) saving("$result/Figure3.gph", replace)
graph export "$result/Figure3.png", as(png) replace


**----------------------------------------------------------------------------**
**---Figure 4: Distribution of Revenue Around the 100 Million VND Threshold
**----------------------------------------------------------------------------**

use "VHBS_2017.dta", clear /*original dataset after create ID*/
label var thudn1 "Revenue in the first 6 months"

g thuy1=thudn1+ thudk1
label var thuy1 "Forecasted annual revenue"

egen c90110= total(1) if thudn1>=90000&thudn1<=110000
g i500=ceil(thudn1/500) if thudn1>=90000&thudn1<=110000
egen i500_2=total(1), by(i500)

g r_thudn1=i500_2/c90110

g i500_3=i500*500

label var r_thudn1 "fraction"

label var i500_3 "Revenue in the first 6 months"

sort i500_3

** Plot the distribution of revenue in first 6 months around 100,000 VND threshold
twoway connected r_thudn1 i500_3 if i500_3>90000, xline(100000) ///
 yscale(r(0 (0.02) 0.16)) ylabel(0 (0.02) 0.16) ///
  graphregion(fcolor(white) ifcolor(white)) plotregion(fcolor(white) ifcolor(white))

graph export "$result/Figure4a.png", replace


** Plot the distribution of forecasted annual revenue around 100,000 VND threshold

egen c90110_dk= total(1) if thuy1>=90000&thuy1<=110000
g i500_dk=ceil(thuy1/500) if thuy1>=90000&thuy1<=110000

egen i500_2dk=total(1), by(i500_dk)

g r_thuy1=i500_2dk/c90110_dk

g i500_3dk=i500_dk*500

label var r_thuy1 "fraction"

label var i500_3dk "Forcasted annual revenue"

sort i500_3dk

* graph 
twoway connected r_thuy1 i500_3dk if i500_3dk>90000, xline(100000) ///
yscale(r(0 (0.02) 0.16)) ylabel(0 (0.02) 0.16) ///
  graphregion(fcolor(white) ifcolor(white)) plotregion(fcolor(white) ifcolor(white))
graph export "$result/Figure4b.png", replace

 
 
 

